

<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
  <meta charset="utf-8">
  
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <meta name="Description" content="scikit-learn: machine learning in Python">

  
  <title>Topic extraction with Non-negative Matrix Factorization and Latent Dirichlet Allocation &mdash; scikit-learn 0.22 documentation</title>
  
  <link rel="canonical" href="http://scikit-learn.org/stable/auto_examples/applications/plot_topics_extraction_with_nmf_lda.html" />

  
  <link rel="shortcut icon" href="../../_static/favicon.ico"/>
  

  <link rel="stylesheet" href="../../_static/css/vendor/bootstrap.min.css" type="text/css" />
  <link rel="stylesheet" href="../../_static/gallery.css" type="text/css" />
  <link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
<script id="documentation_options" data-url_root="../../" src="../../_static/documentation_options.js"></script>
<script src="../../_static/jquery.js"></script> 
</head>
<body>
<nav id="navbar" class="sk-docs-navbar navbar navbar-expand-md navbar-light bg-light py-0">
  <div class="container-fluid sk-docs-container px-0">
      <a class="navbar-brand py-0" href="../../index.html">
        <img
          class="sk-brand-img"
          src="../../_static/scikit-learn-logo-small.png"
          alt="logo"/>
      </a>
    <button
      id="sk-navbar-toggler"
      class="navbar-toggler"
      type="button"
      data-toggle="collapse"
      data-target="#navbarSupportedContent"
      aria-controls="navbarSupportedContent"
      aria-expanded="false"
      aria-label="Toggle navigation"
    >
      <span class="navbar-toggler-icon"></span>
    </button>

    <div class="sk-navbar-collapse collapse navbar-collapse" id="navbarSupportedContent">
      <ul class="navbar-nav mr-auto">
        <li class="nav-item">
          <a class="sk-nav-link nav-link" href="../../install.html">Install</a>
        </li>
        <li class="nav-item">
          <a class="sk-nav-link nav-link" href="../../user_guide.html">User Guide</a>
        </li>
        <li class="nav-item">
          <a class="sk-nav-link nav-link" href="../../modules/classes.html">API</a>
        </li>
        <li class="nav-item">
          <a class="sk-nav-link nav-link" href="../index.html">Examples</a>
        </li>
        <li class="nav-item">
          <a class="sk-nav-link nav-link nav-more-item-mobile-items" href="../../getting_started.html">Getting Started</a>
        </li>
        <li class="nav-item">
          <a class="sk-nav-link nav-link nav-more-item-mobile-items" href="../../tutorial/index.html">Tutorial</a>
        </li>
        <li class="nav-item">
          <a class="sk-nav-link nav-link nav-more-item-mobile-items" href="../../glossary.html">Glossary</a>
        </li>
        <li class="nav-item">
          <a class="sk-nav-link nav-link nav-more-item-mobile-items" href="../../developers/index.html">Development</a>
        </li>
        <li class="nav-item">
          <a class="sk-nav-link nav-link nav-more-item-mobile-items" href="../../faq.html">FAQ</a>
        </li>
        <li class="nav-item">
          <a class="sk-nav-link nav-link nav-more-item-mobile-items" href="../../related_projects.html">Related packages</a>
        </li>
        <li class="nav-item">
          <a class="sk-nav-link nav-link nav-more-item-mobile-items" href="../../roadmap.html">Roadmap</a>
        </li>
        <li class="nav-item">
          <a class="sk-nav-link nav-link nav-more-item-mobile-items" href="../../about.html">About us</a>
        </li>
        <li class="nav-item">
          <a class="sk-nav-link nav-link nav-more-item-mobile-items" href="https://github.com/scikit-learn/scikit-learn">GitHub</a>
        </li>
        <li class="nav-item">
          <a class="sk-nav-link nav-link nav-more-item-mobile-items" href="https://scikit-learn.org/dev/versions.html">Other Versions</a>
        </li>
        <li class="nav-item dropdown nav-more-item-dropdown">
          <a class="sk-nav-link nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">More</a>
          <div class="dropdown-menu" aria-labelledby="navbarDropdown">
              <a class="sk-nav-dropdown-item dropdown-item" href="../../getting_started.html">Getting Started</a>
              <a class="sk-nav-dropdown-item dropdown-item" href="../../tutorial/index.html">Tutorial</a>
              <a class="sk-nav-dropdown-item dropdown-item" href="../../glossary.html">Glossary</a>
              <a class="sk-nav-dropdown-item dropdown-item" href="../../developers/index.html">Development</a>
              <a class="sk-nav-dropdown-item dropdown-item" href="../../faq.html">FAQ</a>
              <a class="sk-nav-dropdown-item dropdown-item" href="../../related_projects.html">Related packages</a>
              <a class="sk-nav-dropdown-item dropdown-item" href="../../roadmap.html">Roadmap</a>
              <a class="sk-nav-dropdown-item dropdown-item" href="../../about.html">About us</a>
              <a class="sk-nav-dropdown-item dropdown-item" href="https://github.com/scikit-learn/scikit-learn">GitHub</a>
              <a class="sk-nav-dropdown-item dropdown-item" href="https://scikit-learn.org/dev/versions.html">Other Versions</a>
          </div>
        </li>
      </ul>
      <div id="searchbox" role="search">
          <div class="searchformwrapper">
          <form class="search" action="../../search.html" method="get">
            <input class="sk-search-text-input" type="text" name="q" aria-labelledby="searchlabel" />
            <input class="sk-search-text-btn" type="submit" value="Go" />
          </form>
          </div>
      </div>
    </div>
  </div>
</nav>
<div class="d-flex" id="sk-doc-wrapper">
    <input type="checkbox" name="sk-toggle-checkbox" id="sk-toggle-checkbox">
    <label id="sk-sidemenu-toggle" class="sk-btn-toggle-toc btn sk-btn-primary" for="sk-toggle-checkbox">Toggle Menu</label>
    <div id="sk-sidebar-wrapper" class="border-right">
      <div class="sk-sidebar-toc-wrapper">
        <div class="sk-sidebar-toc-logo">
          <a href="../../index.html">
            <img
              class="sk-brand-img"
              src="../../_static/scikit-learn-logo-small.png"
              alt="logo"/>
          </a>
        </div>
        <div class="btn-group w-100 mb-2" role="group" aria-label="rellinks">
            <a href="plot_tomography_l1_reconstruction.html" role="button" class="btn sk-btn-rellink py-1" sk-rellink-tooltip="Compressive sensing: tomography reconstruction with L1 prior (Lasso)">Prev</a><a href="../index.html" role="button" class="btn sk-btn-rellink py-1" sk-rellink-tooltip="Examples">Up</a>
            <a href="plot_face_recognition.html" role="button" class="btn sk-btn-rellink py-1" sk-rellink-tooltip="Faces recognition example using eigenfaces and SVMs">Next</a>
        </div>
        <div class="alert alert-danger p-1 mb-2" role="alert">
          <p class="text-center mb-0">
          <strong>scikit-learn 0.22</strong><br/>
          <a href="http://scikit-learn.org/dev/versions.html">Other versions</a>
          </p>
        </div>
        <div class="alert alert-warning p-1 mb-2" role="alert">
          <p class="text-center mb-0">
            Please <a class="font-weight-bold" href="../../about.html#citing-scikit-learn"><string>cite us</string></a> if you use the software.
          </p>
        </div>
          <div class="sk-sidebar-toc">
            <ul>
<li><a class="reference internal" href="#">Topic extraction with Non-negative Matrix Factorization and Latent Dirichlet Allocation</a></li>
</ul>

          </div>
      </div>
    </div>
    <div id="sk-page-content-wrapper">
      <div class="sk-page-content container-fluid body px-md-3" role="main">
        
  <div class="sphx-glr-download-link-note admonition note">
<p class="admonition-title">Note</p>
<p>Click <a class="reference internal" href="#sphx-glr-download-auto-examples-applications-plot-topics-extraction-with-nmf-lda-py"><span class="std std-ref">here</span></a> to download the full example code or to run this example in your browser via Binder</p>
</div>
<div class="sphx-glr-example-title section" id="topic-extraction-with-non-negative-matrix-factorization-and-latent-dirichlet-allocation">
<span id="sphx-glr-auto-examples-applications-plot-topics-extraction-with-nmf-lda-py"></span><h1>Topic extraction with Non-negative Matrix Factorization and Latent Dirichlet Allocation<a class="headerlink" href="#topic-extraction-with-non-negative-matrix-factorization-and-latent-dirichlet-allocation" title="Permalink to this headline">¶</a></h1>
<p>This is an example of applying <a class="reference internal" href="../../modules/generated/sklearn.decomposition.NMF.html#sklearn.decomposition.NMF" title="sklearn.decomposition.NMF"><code class="xref py py-class docutils literal notranslate"><span class="pre">sklearn.decomposition.NMF</span></code></a> and
<a class="reference internal" href="../../modules/generated/sklearn.decomposition.LatentDirichletAllocation.html#sklearn.decomposition.LatentDirichletAllocation" title="sklearn.decomposition.LatentDirichletAllocation"><code class="xref py py-class docutils literal notranslate"><span class="pre">sklearn.decomposition.LatentDirichletAllocation</span></code></a> on a corpus
of documents and extract additive models of the topic structure of the
corpus.  The output is a list of topics, each represented as a list of
terms (weights are not shown).</p>
<p>Non-negative Matrix Factorization is applied with two different objective
functions: the Frobenius norm, and the generalized Kullback-Leibler divergence.
The latter is equivalent to Probabilistic Latent Semantic Indexing.</p>
<p>The default parameters (n_samples / n_features / n_components) should make
the example runnable in a couple of tens of seconds. You can try to
increase the dimensions of the problem, but be aware that the time
complexity is polynomial in NMF. In LDA, the time complexity is
proportional to (n_samples * iterations).</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="c1"># Author: Olivier Grisel &lt;olivier.grisel@ensta.org&gt;</span>
<span class="c1">#         Lars Buitinck</span>
<span class="c1">#         Chyi-Kwei Yau &lt;chyikwei.yau@gmail.com&gt;</span>
<span class="c1"># License: BSD 3 clause</span>

<span class="kn">from</span> <span class="nn">time</span> <span class="kn">import</span> <span class="n">time</span>

<span class="kn">from</span> <span class="nn">sklearn.feature_extraction.text</span> <span class="kn">import</span> <span class="n">TfidfVectorizer</span><span class="p">,</span> <span class="n">CountVectorizer</span>
<span class="kn">from</span> <span class="nn">sklearn.decomposition</span> <span class="kn">import</span> <span class="n">NMF</span><span class="p">,</span> <span class="n">LatentDirichletAllocation</span>
<span class="kn">from</span> <span class="nn">sklearn.datasets</span> <span class="kn">import</span> <span class="n">fetch_20newsgroups</span>

<span class="n">n_samples</span> <span class="o">=</span> <span class="mi">2000</span>
<span class="n">n_features</span> <span class="o">=</span> <span class="mi">1000</span>
<span class="n">n_components</span> <span class="o">=</span> <span class="mi">10</span>
<span class="n">n_top_words</span> <span class="o">=</span> <span class="mi">20</span>


<span class="k">def</span> <span class="nf">print_top_words</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">feature_names</span><span class="p">,</span> <span class="n">n_top_words</span><span class="p">):</span>
    <span class="k">for</span> <span class="n">topic_idx</span><span class="p">,</span> <span class="n">topic</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">model</span><span class="o">.</span><span class="n">components_</span><span class="p">):</span>
        <span class="n">message</span> <span class="o">=</span> <span class="s2">&quot;Topic #</span><span class="si">%d</span><span class="s2">: &quot;</span> <span class="o">%</span> <span class="n">topic_idx</span>
        <span class="n">message</span> <span class="o">+=</span> <span class="s2">&quot; &quot;</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="n">feature_names</span><span class="p">[</span><span class="n">i</span><span class="p">]</span>
                             <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="n">topic</span><span class="o">.</span><span class="n">argsort</span><span class="p">()[:</span><span class="o">-</span><span class="n">n_top_words</span> <span class="o">-</span> <span class="mi">1</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">]])</span>
        <span class="nb">print</span><span class="p">(</span><span class="n">message</span><span class="p">)</span>
    <span class="nb">print</span><span class="p">()</span>


<span class="c1"># Load the 20 newsgroups dataset and vectorize it. We use a few heuristics</span>
<span class="c1"># to filter out useless terms early on: the posts are stripped of headers,</span>
<span class="c1"># footers and quoted replies, and common English words, words occurring in</span>
<span class="c1"># only one document or in at least 95% of the documents are removed.</span>

<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Loading dataset...&quot;</span><span class="p">)</span>
<span class="n">t0</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span>
<span class="n">data</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="n">fetch_20newsgroups</span><span class="p">(</span><span class="n">shuffle</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
                             <span class="n">remove</span><span class="o">=</span><span class="p">(</span><span class="s1">&#39;headers&#39;</span><span class="p">,</span> <span class="s1">&#39;footers&#39;</span><span class="p">,</span> <span class="s1">&#39;quotes&#39;</span><span class="p">),</span>
                             <span class="n">return_X_y</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="n">data_samples</span> <span class="o">=</span> <span class="n">data</span><span class="p">[:</span><span class="n">n_samples</span><span class="p">]</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;done in </span><span class="si">%0.3f</span><span class="s2">s.&quot;</span> <span class="o">%</span> <span class="p">(</span><span class="n">time</span><span class="p">()</span> <span class="o">-</span> <span class="n">t0</span><span class="p">))</span>

<span class="c1"># Use tf-idf features for NMF.</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Extracting tf-idf features for NMF...&quot;</span><span class="p">)</span>
<span class="n">tfidf_vectorizer</span> <span class="o">=</span> <span class="n">TfidfVectorizer</span><span class="p">(</span><span class="n">max_df</span><span class="o">=</span><span class="mf">0.95</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span>
                                   <span class="n">max_features</span><span class="o">=</span><span class="n">n_features</span><span class="p">,</span>
                                   <span class="n">stop_words</span><span class="o">=</span><span class="s1">&#39;english&#39;</span><span class="p">)</span>
<span class="n">t0</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span>
<span class="n">tfidf</span> <span class="o">=</span> <span class="n">tfidf_vectorizer</span><span class="o">.</span><span class="n">fit_transform</span><span class="p">(</span><span class="n">data_samples</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;done in </span><span class="si">%0.3f</span><span class="s2">s.&quot;</span> <span class="o">%</span> <span class="p">(</span><span class="n">time</span><span class="p">()</span> <span class="o">-</span> <span class="n">t0</span><span class="p">))</span>

<span class="c1"># Use tf (raw term count) features for LDA.</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Extracting tf features for LDA...&quot;</span><span class="p">)</span>
<span class="n">tf_vectorizer</span> <span class="o">=</span> <span class="n">CountVectorizer</span><span class="p">(</span><span class="n">max_df</span><span class="o">=</span><span class="mf">0.95</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span>
                                <span class="n">max_features</span><span class="o">=</span><span class="n">n_features</span><span class="p">,</span>
                                <span class="n">stop_words</span><span class="o">=</span><span class="s1">&#39;english&#39;</span><span class="p">)</span>
<span class="n">t0</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span>
<span class="n">tf</span> <span class="o">=</span> <span class="n">tf_vectorizer</span><span class="o">.</span><span class="n">fit_transform</span><span class="p">(</span><span class="n">data_samples</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;done in </span><span class="si">%0.3f</span><span class="s2">s.&quot;</span> <span class="o">%</span> <span class="p">(</span><span class="n">time</span><span class="p">()</span> <span class="o">-</span> <span class="n">t0</span><span class="p">))</span>
<span class="nb">print</span><span class="p">()</span>

<span class="c1"># Fit the NMF model</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Fitting the NMF model (Frobenius norm) with tf-idf features, &quot;</span>
      <span class="s2">&quot;n_samples=</span><span class="si">%d</span><span class="s2"> and n_features=</span><span class="si">%d</span><span class="s2">...&quot;</span>
      <span class="o">%</span> <span class="p">(</span><span class="n">n_samples</span><span class="p">,</span> <span class="n">n_features</span><span class="p">))</span>
<span class="n">t0</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span>
<span class="n">nmf</span> <span class="o">=</span> <span class="n">NMF</span><span class="p">(</span><span class="n">n_components</span><span class="o">=</span><span class="n">n_components</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
          <span class="n">alpha</span><span class="o">=.</span><span class="mi">1</span><span class="p">,</span> <span class="n">l1_ratio</span><span class="o">=.</span><span class="mi">5</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">tfidf</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;done in </span><span class="si">%0.3f</span><span class="s2">s.&quot;</span> <span class="o">%</span> <span class="p">(</span><span class="n">time</span><span class="p">()</span> <span class="o">-</span> <span class="n">t0</span><span class="p">))</span>

<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;</span><span class="se">\n</span><span class="s2">Topics in NMF model (Frobenius norm):&quot;</span><span class="p">)</span>
<span class="n">tfidf_feature_names</span> <span class="o">=</span> <span class="n">tfidf_vectorizer</span><span class="o">.</span><span class="n">get_feature_names</span><span class="p">()</span>
<span class="n">print_top_words</span><span class="p">(</span><span class="n">nmf</span><span class="p">,</span> <span class="n">tfidf_feature_names</span><span class="p">,</span> <span class="n">n_top_words</span><span class="p">)</span>

<span class="c1"># Fit the NMF model</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Fitting the NMF model (generalized Kullback-Leibler divergence) with &quot;</span>
      <span class="s2">&quot;tf-idf features, n_samples=</span><span class="si">%d</span><span class="s2"> and n_features=</span><span class="si">%d</span><span class="s2">...&quot;</span>
      <span class="o">%</span> <span class="p">(</span><span class="n">n_samples</span><span class="p">,</span> <span class="n">n_features</span><span class="p">))</span>
<span class="n">t0</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span>
<span class="n">nmf</span> <span class="o">=</span> <span class="n">NMF</span><span class="p">(</span><span class="n">n_components</span><span class="o">=</span><span class="n">n_components</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
          <span class="n">beta_loss</span><span class="o">=</span><span class="s1">&#39;kullback-leibler&#39;</span><span class="p">,</span> <span class="n">solver</span><span class="o">=</span><span class="s1">&#39;mu&#39;</span><span class="p">,</span> <span class="n">max_iter</span><span class="o">=</span><span class="mi">1000</span><span class="p">,</span> <span class="n">alpha</span><span class="o">=.</span><span class="mi">1</span><span class="p">,</span>
          <span class="n">l1_ratio</span><span class="o">=.</span><span class="mi">5</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">tfidf</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;done in </span><span class="si">%0.3f</span><span class="s2">s.&quot;</span> <span class="o">%</span> <span class="p">(</span><span class="n">time</span><span class="p">()</span> <span class="o">-</span> <span class="n">t0</span><span class="p">))</span>

<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;</span><span class="se">\n</span><span class="s2">Topics in NMF model (generalized Kullback-Leibler divergence):&quot;</span><span class="p">)</span>
<span class="n">tfidf_feature_names</span> <span class="o">=</span> <span class="n">tfidf_vectorizer</span><span class="o">.</span><span class="n">get_feature_names</span><span class="p">()</span>
<span class="n">print_top_words</span><span class="p">(</span><span class="n">nmf</span><span class="p">,</span> <span class="n">tfidf_feature_names</span><span class="p">,</span> <span class="n">n_top_words</span><span class="p">)</span>

<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Fitting LDA models with tf features, &quot;</span>
      <span class="s2">&quot;n_samples=</span><span class="si">%d</span><span class="s2"> and n_features=</span><span class="si">%d</span><span class="s2">...&quot;</span>
      <span class="o">%</span> <span class="p">(</span><span class="n">n_samples</span><span class="p">,</span> <span class="n">n_features</span><span class="p">))</span>
<span class="n">lda</span> <span class="o">=</span> <span class="n">LatentDirichletAllocation</span><span class="p">(</span><span class="n">n_components</span><span class="o">=</span><span class="n">n_components</span><span class="p">,</span> <span class="n">max_iter</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span>
                                <span class="n">learning_method</span><span class="o">=</span><span class="s1">&#39;online&#39;</span><span class="p">,</span>
                                <span class="n">learning_offset</span><span class="o">=</span><span class="mf">50.</span><span class="p">,</span>
                                <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="n">t0</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span>
<span class="n">lda</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">tf</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;done in </span><span class="si">%0.3f</span><span class="s2">s.&quot;</span> <span class="o">%</span> <span class="p">(</span><span class="n">time</span><span class="p">()</span> <span class="o">-</span> <span class="n">t0</span><span class="p">))</span>

<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;</span><span class="se">\n</span><span class="s2">Topics in LDA model:&quot;</span><span class="p">)</span>
<span class="n">tf_feature_names</span> <span class="o">=</span> <span class="n">tf_vectorizer</span><span class="o">.</span><span class="n">get_feature_names</span><span class="p">()</span>
<span class="n">print_top_words</span><span class="p">(</span><span class="n">lda</span><span class="p">,</span> <span class="n">tf_feature_names</span><span class="p">,</span> <span class="n">n_top_words</span><span class="p">)</span>
</pre></div>
</div>
<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 0 minutes  0.000 seconds)</p>
<div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-auto-examples-applications-plot-topics-extraction-with-nmf-lda-py">
<div class="binder-badge docutils container">
<a class="reference external image-reference" href="https://mybinder.org/v2/gh/scikit-learn/scikit-learn/0.22.X?urlpath=lab/tree/notebooks/auto_examples/applications/plot_topics_extraction_with_nmf_lda.ipynb"><img alt="https://mybinder.org/badge_logo.svg" src="https://mybinder.org/badge_logo.svg" width="150px" /></a>
</div>
<div class="sphx-glr-download docutils container">
<p><a class="reference download internal" download="" href="../../_downloads/10aacf585f645eed063e86526e855375/plot_topics_extraction_with_nmf_lda.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">plot_topics_extraction_with_nmf_lda.py</span></code></a></p>
</div>
<div class="sphx-glr-download docutils container">
<p><a class="reference download internal" download="" href="../../_downloads/69eb1f8b440e37562938ff7446f25313/plot_topics_extraction_with_nmf_lda.ipynb"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Jupyter</span> <span class="pre">notebook:</span> <span class="pre">plot_topics_extraction_with_nmf_lda.ipynb</span></code></a></p>
</div>
</div>
<p class="sphx-glr-signature"><a class="reference external" href="https://sphinx-gallery.github.io">Gallery generated by Sphinx-Gallery</a></p>
</div>


      </div>
    <div class="container">
      <footer class="sk-content-footer">
            &copy; 2007 - 2019, scikit-learn developers (BSD License).
          <a href="../../_sources/auto_examples/applications/plot_topics_extraction_with_nmf_lda.rst.txt" rel="nofollow">Show this page source</a>
      </footer>
    </div>
  </div>
</div>
<script src="../../_static/js/vendor/bootstrap.min.js"></script>

<script>
    window.ga=window.ga||function(){(ga.q=ga.q||[]).push(arguments)};ga.l=+new Date;
    ga('create', 'UA-22606712-2', 'auto');
    ga('set', 'anonymizeIp', true);
    ga('send', 'pageview');
</script>
<script async src='https://www.google-analytics.com/analytics.js'></script>


<script>
$(document).ready(function() {
    /* Add a [>>>] button on the top-right corner of code samples to hide
     * the >>> and ... prompts and the output and thus make the code
     * copyable. */
    var div = $('.highlight-python .highlight,' +
                '.highlight-python3 .highlight,' +
                '.highlight-pycon .highlight,' +
		'.highlight-default .highlight')
    var pre = div.find('pre');

    // get the styles from the current theme
    pre.parent().parent().css('position', 'relative');
    var hide_text = 'Hide prompts and outputs';
    var show_text = 'Show prompts and outputs';

    // create and add the button to all the code blocks that contain >>>
    div.each(function(index) {
        var jthis = $(this);
        if (jthis.find('.gp').length > 0) {
            var button = $('<span class="copybutton">&gt;&gt;&gt;</span>');
            button.attr('title', hide_text);
            button.data('hidden', 'false');
            jthis.prepend(button);
        }
        // tracebacks (.gt) contain bare text elements that need to be
        // wrapped in a span to work with .nextUntil() (see later)
        jthis.find('pre:has(.gt)').contents().filter(function() {
            return ((this.nodeType == 3) && (this.data.trim().length > 0));
        }).wrap('<span>');
    });

    // define the behavior of the button when it's clicked
    $('.copybutton').click(function(e){
        e.preventDefault();
        var button = $(this);
        if (button.data('hidden') === 'false') {
            // hide the code output
            button.parent().find('.go, .gp, .gt').hide();
            button.next('pre').find('.gt').nextUntil('.gp, .go').css('visibility', 'hidden');
            button.css('text-decoration', 'line-through');
            button.attr('title', show_text);
            button.data('hidden', 'true');
        } else {
            // show the code output
            button.parent().find('.go, .gp, .gt').show();
            button.next('pre').find('.gt').nextUntil('.gp, .go').css('visibility', 'visible');
            button.css('text-decoration', 'none');
            button.attr('title', hide_text);
            button.data('hidden', 'false');
        }
    });

	/*** Add permalink buttons next to glossary terms ***/
	$('dl.glossary > dt[id]').append(function() {
		return ('<a class="headerlink" href="#' +
			    this.getAttribute('id') +
			    '" title="Permalink to this term">¶</a>');
	});
  /*** Hide navbar when scrolling down ***/
  // Returns true when headerlink target matches hash in url
  (function() {
    hashTargetOnTop = function() {
        var hash = window.location.hash;
        if ( hash.length < 2 ) { return false; }

        var target = document.getElementById( hash.slice(1) );
        if ( target === null ) { return false; }

        var top = target.getBoundingClientRect().top;
        return (top < 2) && (top > -2);
    };

    // Hide navbar on load if hash target is on top
    var navBar = document.getElementById("navbar");
    var navBarToggler = document.getElementById("sk-navbar-toggler");
    var navBarHeightHidden = "-" + navBar.getBoundingClientRect().height + "px";
    var $window = $(window);

    hideNavBar = function() {
        navBar.style.top = navBarHeightHidden;
    };

    showNavBar = function() {
        navBar.style.top = "0";
    }

    if (hashTargetOnTop()) {
        hideNavBar()
    }

    var prevScrollpos = window.pageYOffset;
    hideOnScroll = function(lastScrollTop) {
        if (($window.width() < 768) && (navBarToggler.getAttribute("aria-expanded") === 'true')) {
            return;
        }
        if (lastScrollTop > 2 && (prevScrollpos <= lastScrollTop) || hashTargetOnTop()){
            hideNavBar()
        } else {
            showNavBar()
        }
        prevScrollpos = lastScrollTop;
    };

    /*** high preformance scroll event listener***/
    var raf = window.requestAnimationFrame ||
        window.webkitRequestAnimationFrame ||
        window.mozRequestAnimationFrame ||
        window.msRequestAnimationFrame ||
        window.oRequestAnimationFrame;
    var lastScrollTop = $window.scrollTop();

    if (raf) {
        loop();
    }

    function loop() {
        var scrollTop = $window.scrollTop();
        if (lastScrollTop === scrollTop) {
            raf(loop);
            return;
        } else {
            lastScrollTop = scrollTop;
            hideOnScroll(lastScrollTop);
            raf(loop);
        }
    }
  })();
});

</script>
    
<script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml.js"></script>
    
</body>
</html>